*** Think about quality measurement

global china ../china/data
global quality ../china/data/quality/
global suppdata ../china/suppdata/

*******************************************************
** Set up the sample for estimating supplier quality **
*******************************************************

use $china/imp2002china, clear
forv x=2003/2008 {
append using $china/imp`x'china

drop if v==0
drop if q==0
gen p=v/q

collapse (mean) p (sum) duty charges v swt q, by(hs manuf_id year)

gen HS4=substr(hs,1,4)

bys year: egen tot_imp=total(v)
bys year hs: egen product_exports=total(v)
bys year HS4: egen sector_exports=total(v)
bys year hs manuf_id: egen supplier_exports=total(v)
gen ns_vht = supplier_exports/product_exports

egen variety_tag=tag(hs manuf_id year)
bys year manuf_id HS4: egen supplier_products=total(variety_tag)
bys year hs: egen hs_suppliers=total(variety_tag)


/* Generate the "Outside share" for each HS4, s_0t */

ren year yr
merge m:1 yr HS4 using $suppdata/HS4_exports, keepusing(HS4_exports)
keep if _m==3
drop _m
merge m:1 yr HS4 using $suppdata/HS4_imports, keepusing(HS4_imports)
keep if _m==3
drop _m
merge m:1 yr HS4 using $suppdata/HS4_production_BEA, keepusing(Production_HS4)
drop if _m==2
replace Production_HS4=0 if _m==1
drop _m
ren yr year


gen import_share=(HS4_imports/(Production_HS4-HS4_exports+HS4_imports))
drop if import_share<0
gen s_0t = 1- import_share
gen s_0t_pos = s_0t
replace s_0t_pos = 100 if s_0t<=0
bys year: egen min_s_0t_pos=min(s_0t_pos)
replace s_0t = min_s_0t_pos if s_0t<0

/* Generate other variables that enter the quality estimation */

gen MKT=sector_exports/(1-s_0t)
gen s_vht = supplier_exports/MKT

gen log_ns_vht=log(ns_vht)
gen log_s_vht=log(s_vht)
gen log_s_0t = log(s_0t)
gen share_diff = log_s_vht - log_s_0t


/* Clean up the sample to only run for HS4 sectors with more than 10 observations and more than one HS10 */
bys HS4: gen hs4_nobs=_N
drop if hs4_nobs<10

egen hs10_tag= tag(hs)
bysort HS4: egen hs10_in_hs4=total(hs10_tag)
drop if hs10_in_hs4==1

egen pglist=group(HS4)

save $quality/quality_est, replace

*******************************************
** Quality estimation begins here *********
*******************************************

use $quality/quality_est, clear
summ pglist, meanonly
local max=`r(max)'
disp `max'



forv x=1/`max' {
disp `x'
use $quality/quality_est if pglist==`x', clear

/* Set up other key variables for quality IV estimation: charges per unit, shipping weight per unit */

gen charge_per_unit=charges/q
gen swt_per_unit=swt/q


_pctile p, p(1 99)
local price_high=r(r2)
local price_low=r(r1)
drop if p>`price_high'
drop if p<`price_low'

bysort year: gen year_nobs=_N
drop if year_nobs<2


egen manuf_id_num=group(manuf_id)
xtset manuf_id_num

capture {

/* First do OLS regression, and report coefficients */
 xi: xtreg share_diff p log_ns_vht i.year, fe
gen p_coeff_ols=_b[p]
gen mkt_share_coeff_ols=_b[log_ns_vht]


/* Next do IV regression, and report coefficients and elasticity */
xi: xtivreg share_diff (p log_ns_vht= charge_per_unit swt_per_unit hs_suppliers supplier_products) i.year, fe
gen p_coeff_iv=_b[p]
gen mkt_share_coeff=_b[log_ns_vht]

gen elast = p_coeff_iv*p*(1-s_vht)
egen elast_med = median(elast)
egen elast_mean = mean(elast)

/* But this missing many important other statistics that are happening in the background, including quality estimates! 

First, conduct the Overidentifying restriction test and generate the F-stats.  
Note that xtoverid doesn not work well if some of the key variables have no variation, as supplier products and duties, which is why we need a few extra lines.   */

summ supplier_products
local sp_sd=`r(sd)'
summ duty
local d_sd=`r(sd)'
if `sp_sd'==0 {
	if `d_sd' == 0 {
	xi: xtivreg share_diff (p log_ns_vht= charge_per_unit swt_per_unit hs_suppliers) i.year, fe
	}
	else {
	xi: xtivreg share_diff (p log_ns_vht= charge_per_unit swt_per_unit hs_suppliers) i.year, fe
	}
}
else if `d_sd' == 0 {
xi: xtivreg share_diff (p log_ns_vht= charge_per_unit swt_per_unit hs_suppliers supplier_products) i.year, fe
}
else {
xi: xtivreg share_diff (p log_ns_vht= charge_per_unit swt_per_unit hs_suppliers supplier_products) i.year, fe
}
cap xtoverid
cap local y=r(jp)

/* P-value for test of overidentifying restrictions */
cap gen overid_pval=`y'

/* P-value for 1st stage F-stats */
xi: xtreg p charge_per_unit swt_per_unit hs_suppliers supplier_products i.year, fe
gen f_stat_price_pval = Ftail(e(df_b)-1,e(df_r), e(F) )

xi: xtreg log_ns_vht charge_per_unit swt_per_unit hs_suppliers supplier_products i.year, fe
gen f_stat_nestshare_pval = Ftail(e(df_b)-1,e(df_r), e(F) )

/* 1st stage R2 and Observation count */
local r2=e(r2)
local nobs=e(N)
gen r2=`r2'
gen nobs=`nobs'

corr s_vht ns_vht
local corr=r(rho)
gen share_nestshare_corr=`corr'
}


/* Additional wrinkle from getting individual lambda1, lambda2, lambda3 terms:
Standard procedure gives missing quality (lambda) for many suppliers, since many suppliers were only there one year (lambda_2 missing)
To get around this, I essentially impose lambda2=0 for those suppliers :
- Run the quality regression as usual and predict lambda3 as the residual.
- Generate a "lambda_hat" which is just the y - beta*x, ignoring lambda1 and lambda2, which can sometimes be missing
- Generate a "lambda_diff" which is lambda1+lambda2+lambda3 - lambda_hat.  Note this will be either the same number for every supplier (not 0 due to one omitted supplier in the regression) or missing.
- Take the mean of that object so it exists for every supplier
- Add it to the lambda_hat to generate lambda for every supplier, even if lambda_2 is missing.  Thus lambda is the same as estimated where lambda2 is not missing, and lambda1 + lambda3 where lambda2 is missing.
*/

capture {
reghdfe share_diff (p log_ns_vht= charge_per_unit swt_per_unit hs_suppliers supplier_products), absorb(lambda1=manuf_id_num lambda2=year) ivsuite(ivregress) old
predict lambda3, resid
gen lambda_hat=share_diff - _b[p]*p - _b[log_ns_vht]*log_ns_vht
gen lambda_diff=lambda1+lambda2+lambda3-lambda_hat
egen lambda_diff_mean=mean(lambda_diff)
gen lambda=lambda_hat+lambda_diff_mean
/* Standard errors for the coefficients (Actual Coefficients same as IV coefficient above) */ 
gen p_se = _se[p]
gen mkt_share_se = _se[log_ns_vht]
/* T-statistic for price coefficient */
gen p_tstat= p_coeff_iv / p_se
ttest lambda=0
local qual_tstat=r(t)
/* T-statistic for t-test on quality */
gen tstat=`qual_tstat'
keep lambda hs manuf_id year p_coeff_ols mkt_share_coeff_ols p_coeff_iv p_se mkt_share_coeff overid_pval f_stat_price_pval ///
f_stat_nestshare_pval tstat r2 nobs p_tstat share_nestshare_corr elast_med elast_mean
preserve
keep lambda hs manuf_id year
/* Save one version that includes all supplier observations (Big) */
save $quality/quality_est_full_hs4_`x', replace
restore
}
/* Make another version for summarizing key statistics across HS4 sectors */
keep in 1
save $quality/quality_est_hs4_`x', replace
}
** End of loop over HS4 categories **

***********************************************************
* Make the data set for SUMMARIZING Quality Stats- drops all supplier-specific observations
***********************************************************

use $quality/quality_est_hs4_1, clear
forv x=2/`max' {
append using $quality/quality_est_hs4_`x'
}
save $quality/quality_est_summary, replace

forv x=1/`max' {
erase $quality/quality_est_hs4_`x'.dta
}


use $quality/quality_est_summary, clear
_pctile p_coeff_iv, p(1 99)
local p_coeff_iv_high=r(r2)
local p_coeff_iv_low=r(r1)
drop if p_coeff_iv>`p_coeff_iv_high'
drop if p_coeff_iv<`p_coeff_iv_low'

gen SS=(p_tstat<=-1.96)
replace SS=1 if p_tstat>=1.96
tab SS

gen SS_nobs=nobs*SS
egen total_SS_nobs=total(SS_nobs)
egen total_nobs=total(nobs)

*********************************
** Table 3 Statistics *******
*********************************
summ p_coeff_ols, d
summ p_coeff_iv, d
summ elast_med, d
summ elast_med if p_coeff_iv<0, d
summ elast_mean, d
summ elast_mean if p_coeff_iv<0, d
summ overid_pval, d
summ f_stat_price_pval, d
summ f_stat_nestshare_pval, d
summ mkt_share_coeff_ols, d
summ mkt_share_coeff, d
summ r2, d
summ nobs, d
summ total_SS_nobs 
summ total_nobs


***********************************************************
* Make the data set containing ALL Quality Stats- keep all supplier-specific observations
***********************************************************

use $quality/quality_est_full_hs4_1, clear
forv x=2/`max' {
cap append using $quality/quality_est_full_hs4_`x'
}

save $quality/quality_est_final, replace


forv x=1/`max' {
erase $quality/quality_est_full_hs4_`x'.dta


** Application : How does lambda change over time */
use hs manuf_id year lambda using $quality/quality_est_final, clear
_pctile lambda, p(5 95)
local lambda_high=r(r2)
local lambda_low=r(r1)
drop if lambda>`lambda_high'
drop if lambda<`lambda_low'

bysort hs manuf_id: egen lambda_sd_supplier_overtime=sd(lambda)
bysort hs year: egen lambda_sd_withinyear=sd(lambda)

/* Standard deviation of quality changes over time */
summ lambda_sd_supplier_overtime, d
/* Standard deviation of quality within a year */
summ lambda_sd_withinyear, d


/* How often is the change in quality over time less than 1 unit */
bysort hs manuf_id: gen lambda_change=lambda-lambda[_n-1]
gen lambda_change_sub1=(abs(lambda_change)<1)


